Load packages

library(pheatmap)
library(stringr)
library(Biobase)
library(ggplot2)
library(Mfuzz)

load data

load("analysis/obj/palmieri_final.rda")
res.IPscr_IPinh.ext<-read.delim(file="analysis/result/conventional/res.IPscr_IPinh.txt")

preprocessing and filtering

Genes were selected based on raw p-values < 0.05

#select genes
idx.genes2cluster<-res.IPscr_IPinh.ext$P.Value<0.05

table(idx.genes2cluster)
## idx.genes2cluster
## FALSE  TRUE 
## 15950   163
#get the normalized signal intensities of all arrays
genes2cluster<-res.IPscr_IPinh.ext[idx.genes2cluster, 
            grep(".CEL", colnames(res.IPscr_IPinh.ext))]

rownames(genes2cluster)<-res.IPscr_IPinh.ext[idx.genes2cluster, "SYMBOL"]

heatmap of selected genes

condition_names <- ifelse(str_detect(pData(palmieri_final)$condition,
                                     "input"), "input", "IP")
treatment_names <- ifelse(str_detect(pData(palmieri_final)$treatment,
                                     "scr"), "scr", "inh")

annotation_for_heatmap <-data.frame(condition = condition_names,  treatment = treatment_names)
row.names(annotation_for_heatmap) <- row.names(pData(palmieri_final))

anno_colors <- list( condition= c(input = "chartreuse4", IP = "burlywood3"),
                    treatment= c(inh = "blue4", scr = "cadetblue2"))


  pheatmap(genes2cluster, annotation_col = annotation_for_heatmap,
         annotation_colors = anno_colors, scale = "row", show_rownames = F)

k-means clustering

Row-wise z-score were calculated of the selected genes and k-means clustering was applied using 10 clusters

mat.scaled<-t(apply(genes2cluster,1,scale))
colnames(mat.scaled)<-colnames(genes2cluster)

lv.samp<-colnames(mat.scaled)

i=10

#run clustering
k<-kmeans(mat.scaled, i,20)
  
#get cluster centers
    print(pheatmap(k$centers, annotation_col = annotation_for_heatmap,
                 cluster_cols=T, annotation_colors =anno_colors ))

  clust.centers<-data.frame(k$centers)
  df.cluster.centers<-reshape(clust.centers,idvar="cluster",ids=row.names(clust.centers), 
                              times=names(clust.centers), timevar="sample",varying=list(names(clust.centers)),
                              direction = "long", v.names="center")
  
  df.cluster.centers$cluster<-factor(df.cluster.centers$cluster, levels=(1:i))
  df.cluster.centers$sample<-factor(df.cluster.centers$sample,levels=names(clust.centers))
  
  p<-ggplot(df.cluster.centers, aes(x=sample, y=center, color=cluster, group=1))
  p<-p+facet_grid(cluster ~ .)
  p<-p+geom_line(lwd=2)+ theme(axis.text.x = element_text(angle = 90, hjust = 1))+
    xlab("")
  
  print(p)

  anno_row=data.frame(cluster=as.factor(k$cluster))
  

    pheatmap(mat.scaled[order(k$cluster),], cluster_cols=F, 
           cluster_rows = F,show_rownames = F,
           annotation_row =anno_row ,
           annotation_col=annotation_for_heatmap,annotation_colors = anno_colors)

fuzzy c-means clustering

selected.probes<-as.character(res.IPscr_IPinh.ext[idx.genes2cluster,"PROBEID"])
eset_final.s<-standardise(palmieri_final[selected.probes,])

## get m parameter
m1<-mestimate(eset_final.s)
m1
## [1] 2.03368
cl<-mfuzz(eset_final.s,c=14, m=m1)

#show cluster overlap
O<-overlap(cl)
ptmp<-overlap.plot(cl,over=O,thres = 0.05)

n.cluster<-nrow(cl$centers)
member.thr<-0.25
# plot settings
exprs.mat<-exprs(eset_final.s)
max.exprs<-max(exprs.mat)
min.exprs<-min(exprs.mat)


for(i in 1:n.cluster){
  #get members
  members<-cl$membership[(cl$membership[,i]>member.thr),i]
  cl.exprs<-exprs.mat[names(members),]
  plot.data<-reshape2::melt(cl.exprs)
  colnames(plot.data)<-c("gene_name","array","z_scores")
  #attach membership
  plot.data2<-cbind(plot.data,membership=members[match(plot.data$gene_name,names(members))])
  
  #high membership in the front
  plot.data2$gene_name<-factor(plot.data2$gene_name,levels = names(members)[order(members)])
  
  p<-ggplot(plot.data2, aes(x=array, y=z_scores, group=gene_name, color=membership))
  p<-p+geom_line()+theme_classic()+
    theme(axis.text.x = element_text(angle = 90, hjust = 1))+
    xlab("")+scale_color_gradientn(colors = c("yellow","greenyellow","cyan","royalblue","deeppink","red"),
                                   limits=c(0.25,1))+ggtitle(paste0("cluster",i))
  
 
    print(p)
  
  cluster.table<-cbind(exprs(palmieri_final)[rownames(cl.exprs),],
        fData(palmieri_final)[match(rownames(cl.exprs),
            fData(palmieri_final)$PROBEID), c("SYMBOL","GENENAME")],
        cluster.membership=members[rownames(cl.exprs)])

}

Session info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] C
## 
## attached base packages:
## [1] tcltk     parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
## [1] Mfuzz_2.45.0        DynDoc_1.63.0       widgetTools_1.63.0 
## [4] e1071_1.7-2         ggplot2_3.2.0       Biobase_2.45.0     
## [7] BiocGenerics_0.31.4 stringr_1.4.0       pheatmap_1.0.12    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1         plyr_1.8.4         compiler_3.6.0    
##  [4] pillar_1.4.1       RColorBrewer_1.1-2 tkWidgets_1.63.0  
##  [7] class_7.3-15       tools_3.6.0        digest_0.6.19     
## [10] evaluate_0.14      tibble_2.1.3       gtable_0.3.0      
## [13] pkgconfig_2.0.2    rlang_0.3.4        rstudioapi_0.10   
## [16] yaml_2.2.0         xfun_0.8           withr_2.1.2       
## [19] dplyr_0.8.1        knitr_1.23         grid_3.6.0        
## [22] tidyselect_0.2.5   glue_1.3.1         R6_2.4.0          
## [25] rmarkdown_1.13     reshape2_1.4.3     purrr_0.3.2       
## [28] magrittr_1.5       scales_1.0.0       htmltools_0.3.6   
## [31] assertthat_0.2.1   colorspace_1.4-1   labeling_0.3      
## [34] stringi_1.4.3      lazyeval_0.2.2     munsell_0.5.0     
## [37] crayon_1.3.4